import pandas as pd


class FastaReader:

    def __init__(self, file):
        self.file = file  # 文件位置
        self.message = open(self.file, 'r')  # 读取文件
        self.group = {}  # 序列组
        self.tittle_group = []  # 序列标题组
        title = ''  # 标题缓存
        content_line = ''  # 碱基序列缓存
        i = 0  # 序列编号
        current_line = 0
        last_line = len(self.message.readlines())
        self.message.seek(0, 0)
        print(last_line)
        for msg_line in self.message:
            current_line += 1
            if msg_line[0] == '>':  # 判断标题
                title = msg_line[1:len(msg_line) - 1]
                self.tittle_group.append(title)
                self.group[self.tittle_group[i - 1]] = content_line
                content_line = ''
                i += 1
            else:
                msg_line = msg_line.replace('\n', '').replace('\r', '')  # 去除回车
                msg_line = msg_line.upper()
                content_line += msg_line
                if current_line == last_line:
                    self.group[self.tittle_group[i - 1]] = content_line

    def ToCsv(self, csv_name):
        frame = pd.DataFrame({'tittle': self.group.keys(), 'gene_code': self.group.values()})
        frame.to_csv(csv_name, index=False)

    def ToTxt(self, txt_name):
        with open(txt_name, 'wb') as file:
            file.write(str(self.group.keys()).encode('GBK', 'strict'))
            file.write(str(self.group.values()).encode('GBK', 'strict'))


reader = FastaReader('GCA_000167115.1_ASM16711v1_genomic.fna')
reader.ToCsv('demo.csv')
